### Rachel Porter
### 12/1/2022
### Replication file for Rep veterans' analysis

rm(list=ls())

# load libraries
library("cobalt")
library("WeightIt")
library("ebal")
library("jtools")
library("survey")
library("sensemakr")
library("stargazer")
library("MASS")
library("arm")
library("ggplot2")

# clear environment and set working directory
setwd("~/Dropbox/Primary_Elections/Issue Paper/Replication_Archive/JOP_Replication_Files")

# load data for male/female democrat analysis
master <- readRDS("vet_republican_data.rds")

# subset data into strategic/no groups for balancing
strategic <- subset(master, master$strategic == "Strategic")
not <- subset(master, master$strategic == "Not")

############################### NONVET PROFESSIONAL REP #################################

# main findings; Model 1 with weights, Table A15
w.out.strat <- weightit(military_number ~ quality_cand + openrace + rep_pres_av +
                          primary_type + year + vet_rescaled + installations + 
                          gender, data = strategic, estimand = "ATT", 
                          method = "ebal")
d.w.strat <- svydesign(ids = ~1, weights = w.out.strat$weights,
                          data = strategic)
fit.strat <- svyglm(narrow_military ~ military_number + quality_cand + openrace + 
                          rep_pres_av + primary_type + year + vet_rescaled + 
                          installations + gender, design = d.w.strat, 
                          family=quasibinomial())

# appendix Model 1 without weights, Table A15
fit2 <- glm(narrow_military ~ military_number + quality_cand + openrace + 
                          rep_pres_av + primary_type + year + vet_rescaled + 
                          installations + gender, data = strategic, 
                          family=binomial())

# appendix Model 2 with weights, Table A15
w.out.strat3 <- weightit(military_number ~ as.factor(qual_chall) + openrace + 
                           rep_pres_av + primary_type + year + vet_rescaled + 
                           installations + gender, data = strategic, 
                           estimand = "ATT", method = "ebal")
d.w.strat3 <- svydesign(ids = ~1, weights = w.out.strat3$weights, 
                           data = strategic)
fit.strat3 <- svyglm(narrow_military ~ military_number + as.factor(qual_chall) + 
                           openrace + rep_pres_av + primary_type + year + 
                           vet_rescaled + installations + gender, 
                           design = d.w.strat3, family=quasibinomial())

# appendix Model 2 without weights, Table A15
fit3 <- glm(narrow_military ~ military_number + as.factor(qual_chall) + 
                           openrace + rep_pres_av + primary_type + year + 
                           vet_rescaled + installations + gender, data = strategic, 
                           family=binomial())

# appendix Model 3, Table A15
w.out.strat2 <- weightit(military_number ~ quality_cand + openrace + ss_party + 
                           primary_type + year + vet_pop + installations + gender, 
                           data = strategic, estimand = "ATT", method = "ebal")
d.w.strat2 <- svydesign(ids = ~1, weights = w.out.strat2$weights,
                           data = strategic)
fit.strat2 <- svyglm(narrow_military ~ military_number + quality_cand + 
                           openrace + ss_party + primary_type + year + vet_pop + 
                           installations + gender, design = d.w.strat2, 
                           family=quasibinomial())

#################### 
#### APPENDIX TABLE A15
#################### 
stargazer(fit.strat, fit2, fit.strat3, fit3, fit.strat2,
                           star.char = c("*", "*"),
                           star.cutoffs = c(0.05, 0.01))

# Simulating predicted probabilities for Figure 5
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.strat$coef
vcv <- vcov(fit.strat)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.strat$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
           betas = betas, sim.sd = apply(sim.betas, 2, sd), 
           se = sqrt(diag(vcv)))   

# Set values for all other variables 
military_number <- c(0,1)
vet_rescaled <- c(-0.02,-0.02)
openrace1 <- c(0,0)
`primary_typeOpen Primary` <- c(1,1)
quality_cand1 <- c(1,1)
installations <- c(2,2)
rep_pres_av <- c(55,55)
year2020 <- c(0,0)
gender1 <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, military_number, quality_cand1, openrace1, 
                    rep_pres_av, `primary_typeOpen Primary`, year2020, 
                    vet_rescaled, installations, gender1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)

# Compute pp and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(military_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

pe_strat <- pp.sim[,2] - pp.sim[,1]
lo <- quantile(pe_strat, prob = .025)
hi <- quantile(pe_strat, prob = .975)

vet_vector <- c(mean(pe_strat), lo, hi)

#################### 
## Love plots, APPENDIX FIGURE 11
#################### 

w.out.strat <- weightit(military_number ~ quality_cand + openrace + rep_pres_av +
                          primary_type + year + vet_rescaled + installations + 
                          gender, data = strategic, estimand = "ATT", 
                        method = "ebal")
w.out.strat2 <- weightit(military_number ~ quality_cand + openrace + rep_pres_av +
                          primary_type + year + vet_rescaled + installations + 
                          gender, data = strategic, estimand = "ATT", 
                        method = "cbps")

new.names.strat = c(quality_cand = "Experienced Candidate", openrace = "Open Seat", 
                    rep_pres_av = "District Presidential Vote", 
                    `primary_type_Open Primary` = "Open Primary", 
                    year_2020 = "2020 Primary Election", 
                    installations = "# Mil. Installations", 
                    vet_rescaled = "% District Veteran Pop.", 
                    gender = "Female Candidate")


love.plot(military_number ~ quality_cand + openrace + rep_pres_av + 
          primary_type + year + vet_rescaled + installations + gender,
          data = strategic, estimand = "ATT",
          stats = c("mean.diffs"),
          weights = list(w1 = get.w(w.out.strat),
                         w2 = get.w(w.out.strat2)),
          var.order = "unadjusted",
          abs = TRUE,
          line = FALSE, 
          title = NULL,
          size =  5,
          threshold = c(mean.diffs = .05,
                        ks = .05),
          var.names = new.names.strat,
          colors = c("black", "black", "black"),
          shapes = c("triangle filled", "circle filled", "square filled"),
          sample.names = c("Unweighted", "Entropy Balanced", "CBPS Weighted"),
          limits = list(mean.diffs = c(0, .55),
                        ks = c(0, .55)),
          wrap = 20, grid = FALSE,
          position = "top",
          themes = list(theme(text = element_text(size=20)),
                        theme(text = element_text(size=20))))

rm(list=setdiff(ls(), c("not", "vet_vector", "pe_strat")))

############################### NONVET AMATEUR REP #################################

# main findings; Model 1 with weights, Table A16
w.out.not <- weightit(military_number ~ openrace + rep_pres_av + primary_type + 
                      year + vet_rescaled + installations + gender,
                      data = not, estimand = "ATT", method = "ebal")
d.w.am <- svydesign(ids = ~1, weights = w.out.not$weights,
                      data = not)
fit.am <- svyglm(narrow_military ~ military_number + openrace + rep_pres_av + 
                      primary_type + year + vet_rescaled + installations + 
                      gender, design = d.w.am, family=quasibinomial())

# appendix Model 1 without weights, Table A16
fit2 <- glm(narrow_military ~ military_number + openrace + rep_pres_av + 
                      primary_type + year + vet_rescaled + installations + 
                      gender, data = not, family=binomial())

# appendix Model 2, Table A16
w.out.not2 <- weightit(military_number ~ openrace + ss_party + primary_type + 
                      year + vet_pop + installations + gender,
                      data = not, estimand = "ATT", method = "ebal")
d.w.am2 <- svydesign(ids = ~1, weights = w.out.not2$weights,
                      data = not)
fit.am2 <- svyglm(narrow_military ~ military_number + openrace + ss_party + 
                      primary_type + year + vet_pop + installations + gender,
                      design = d.w.am, family=quasibinomial())

#################### 
#### APPENDIX TABLE A16
#################### 
stargazer(fit.am, fit2, fit.am2,
          star.char = c("*", "*"),
          star.cutoffs = c(0.05, 0.01))

# Simulating predicted probabilities for Figure 5
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.am$coef
vcv <- vcov(fit.am)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.am$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
           betas = betas, sim.sd = apply(sim.betas, 2, sd), 
           se = sqrt(diag(vcv)))   

# Set values for all other variables 
military_number <- c(0,1)
openrace1 <- c(0,0)
vet_rescaled <- c(-0.02,-0.02)
`primary_typeOpen Primary` <- c(1,1)
year2020 <- c(0,0)
rep_pres_av <- c(55,55)
installations <- c(2,2)
gender1 <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, military_number, openrace1,rep_pres_av,
                    `primary_typeOpen Primary`, year2020, vet_rescaled, 
                    installations, gender1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)

# Compute the pp and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(military_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

pe_not <- pp.sim[,2] - pp.sim[,1]
lo <- quantile(pe_not, prob = .025)
hi <- quantile(pe_not, prob = .975)

vet_vector <- rbind.data.frame(vet_vector, c(mean(pe_not), lo, hi))
colnames(vet_vector) <- c("pe", "lo", "hi")

# running ttest discussed in manuscript
t.test(pe_strat, pe_not)

# saving simulations to create Figure 4
setwd("~/Dropbox/Primary_Elections/Issue Paper/Replication_Archive/JOP_Replication_Files")
saveRDS(vet_vector, file = "figure_5_rvet.rds")

#################### 
## Love plots, APPENDIX FIGURE 12
#################### 
w.out.not <- weightit(military_number ~ openrace + rep_pres_av + primary_type + 
                        year + vet_rescaled + installations + gender,
                      data = not, estimand = "ATT", method = "ebal")
w.out.not2 <- weightit(military_number ~ openrace + rep_pres_av + primary_type + 
                        year + vet_rescaled + installations + gender,
                      data = not, estimand = "ATT", method = "cbps")

new.names.not = c(openrace = "Open Seat", 
                  rep_pres_av = "District Presidential Vote", 
                  `primary_type_Open Primary` = "Open Primary", 
                  year_2020 = "2020 Primary Election", 
                  installations = "# Mil. Installations", 
                  vet_rescaled = "% District Veteran Pop.", 
                  gender = "Female Candidate")

love.plot(military_number ~ openrace + rep_pres_av + primary_type + year + 
            vet_rescaled + installations + gender, 
          data = not, estimand = "ATT",
          stats = c("mean.diffs"),
          weights = list(w1 = get.w(w.out.not),
                         w2 = get.w(w.out.not2)),
          var.order = "unadjusted",
          abs = TRUE,
          line = FALSE, 
          title = NULL,
          size =  5,
          threshold = c(mean.diffs = .05,
                        ks = .05),
          var.names = new.names.not,
          colors = c("black", "black", "black"),
          shapes = c("triangle filled", "circle filled", "square filled"),
          sample.names = c("Unweighted", "Entropy Balanced", "CBPS Weighted"),
          limits = list(mean.diffs = c(0, .55),
                        ks = c(0, .55)),
          wrap = 20, grid = FALSE,
          position = "top",
          themes = list(theme(text = element_text(size=20)),
                        theme(text = element_text(size=20))))
